## This program generates the bottom panels of Figure 5 from 
##  "Financial Constraints, Sectoral Heterogeneity, and the Cyclicality of Investment" by Cooper Howes

##### Step 0: Preliminaries #####

## Clear variables and data
rm(list=ls())

## Load packages
library(tidyverse)
library(magrittr)
library(dplyr)


##### Step 1: Download BDS data from the Census website #####


## The code below downloads the .csv files for 2020 (the most recent available vintage 
##  at the time of publication) directly. The links will need to be updated when
##  newer vintages are released, but the replication package also includes the 
##  original files from 2020 used in this analysis. 
##  More details here: https://www.census.gov/data/datasets/time-series/econ/bds/bds-datasets.html

## Sector-by-firm-age data
bds_data_firm_age <- read_csv("https://www2.census.gov/programs-surveys/bds/tables/time-series/bds2020_sec_fa.csv")

## Sector-by-firm-size data
bds_data_firm_size <- read_csv("https://www2.census.gov/programs-surveys/bds/tables/time-series/bds2020_sec_fz.csv")

## Aggregate data series by sector, firm age, and firm size
bds_sector_total <- read_csv("https://www2.census.gov/programs-surveys/bds/tables/time-series/bds2020_sec.csv")
bds_age_total <- read_csv("https://www2.census.gov/programs-surveys/bds/tables/time-series/bds2020_fa.csv")
bds_size_total <- read_csv("https://www2.census.gov/programs-surveys/bds/tables/time-series/bds2020_fz.csv")

## Convert data to numeric
bds_data_firm_age$firms  %<>% as.numeric
bds_data_firm_age$estabs  %<>% as.numeric
bds_data_firm_age$emp  %<>% as.numeric

bds_data_firm_size$firms  %<>% as.numeric
bds_data_firm_size$estabs  %<>% as.numeric
bds_data_firm_size$emp  %<>% as.numeric

bds_age_total$firms  %<>% as.numeric
bds_age_total$estabs  %<>% as.numeric
bds_age_total$emp  %<>% as.numeric


##### Step 2: Classify the data by industry #####


## Data codebook: https://www.census.gov/content/dam/Census/programs-surveys/business-dynamics-statistics/bds-codebook.pdf

## NAICS code definitions found here: https://www.bls.gov/iag/tgs/iag_index_naics.htm
## For durable/nondurable manufacturing split, see here: https://www.bls.gov/jlt/jltnaics.htm 

## Create list of all the unique two-digit NAICS codes in the data
naics_codes <- unique(bds_data_firm_age$sector)

## Manufacturing (31-33)
mfg_naics <- naics_codes[which(naics_codes=="31-33")]

## Construction (23)
constr_naics <- naics_codes[which(naics_codes=="23")]

## Retail trade (44-45)
retail_naics <- naics_codes[which(naics_codes=="44-45")]

## Transportation (48-49)
transpo_naics <- naics_codes[which(naics_codes=="48-49")]

## Information (51)
info_naics <- naics_codes[which(naics_codes=="51")]

## Real estate (52-53)
re_naics <- naics_codes[which(naics_codes=="53")]

## Education (61)
educ_naics <- naics_codes[which(naics_codes=="61")]

## Professional and business services
prof_naics <- naics_codes[which(naics_codes=="54")]



##### Step 3: Create aggregates by firm age and size #####


## Generate new sector classification 
bds_sector_total <- mutate(bds_sector_total,sector2=0)

bds_sector_total[bds_sector_total$sector %in% mfg_naics,"sector2"] <- 1
bds_sector_total[bds_sector_total$sector %in% constr_naics,"sector2"] <- 2
bds_sector_total[bds_sector_total$sector %in% retail_naics,"sector2"] <- 3
bds_sector_total[bds_sector_total$sector %in% transpo_naics,"sector2"] <- 4
bds_sector_total[bds_sector_total$sector %in% info_naics,"sector2"] <- 5
bds_sector_total[bds_sector_total$sector %in% re_naics,"sector2"] <- 6
bds_sector_total[bds_sector_total$sector %in% educ_naics,"sector2"] <- 7
bds_sector_total[bds_sector_total$sector %in% prof_naics,"sector2"] <- 8

## Collapse into coarser groupings
bds_sector_total_collapsed <- bds_sector_total %>% group_by(year,sector2) %>% 
  summarize_at(vars(emp,firms,estabs),sum)

bds_sector_total_collapsed %<>% mutate(.,sizedummy=2)
bds_sector_total_collapsed %<>% mutate(.,agedummy=2)

## Generate new size variable
bds_size_total <- mutate(bds_size_total,fsize2=substr(bds_size_total$fsize,1,1))

## Choose the smallest category to be counted as a large firm 
##  1) 1-4 employees, 2) 5-9, 3) 10-19, 4) 20-99, 5) 100-499, 6) 500-999, 
##  7) 1000-2499, 8) 2500-4999, 9) 5000-9999, 10) 10000+ 
sizecutoff <- 3

## Create size cutoff variables. 0 is small (defined above), 1 is large, 2 is total
bds_size_total <- mutate(bds_size_total,sizedummy=0)
bds_size_total[bds_size_total$fsize2 %in% letters[(sizecutoff):10],"sizedummy"] <- 1

## Collapse
bds_size_total_collapsed <- bds_size_total %>% group_by(year,sizedummy) %>% 
  summarize_at(vars(emp,firms,estabs),sum)


## Generate new age variable
bds_age_total <- mutate(bds_age_total,fage2=substr(bds_age_total$fage,1,1))

## Here you choose the youngest age category (in years) you want counted as an old firm (default: 9)
##  1) 0 years, 2) 1 year, 3) 2, 4) 3, 5) 4, 6) 5, 7) 6-10, 8) 11-15, 9) 16-20, 10) 21-25, 11) 26+,
##  12) Left censored (generally applies to firms at the start of the sample, who are treated as
##  having an unknown age)
agecutoff <- 9

## Create size cutoff variables. 0 is small (defined above), 1 is large, 2 is total
bds_age_total <- mutate(bds_age_total,agedummy=0)
bds_age_total[bds_age_total$fage2 %in% letters[(agecutoff):12],"agedummy"] <- 1


## Collapse
bds_age_total_collapsed <- bds_age_total %>% group_by(year,agedummy) %>% 
  summarize_at(vars(emp,firms,estabs),sum)

bds_size_total_collapsed %<>% mutate(.,sector2=9,agedummy=2)
bds_sector_total_collapsed %<>% mutate(.,agedummy=2,sizedummy=2)
bds_age_total_collapsed %<>% mutate(.,sector2=9,sizedummy=2)

bds_all <- bds_size_total_collapsed %>% group_by(year) %>%
  summarize_at(vars(emp,firms,estabs),sum) %>% mutate(.,sector2=9,sizedummy=2,agedummy=2)

bds_totals <- bind_rows(bds_size_total_collapsed,bds_sector_total_collapsed,bds_age_total_collapsed,bds_all) %>%
  arrange(year,sector2)



##### Step 4: Create measures of firm age and size within industries #####



## Generate new sector variable
bds_data_firm_size <- mutate(bds_data_firm_size,sector2=0)

## Generate sub-industry codes
# Manufacturing: 1
# Construction: 2
# Retail: 3
# Transpo: 4
# Info: 5
# Real estate: 6
# Education: 7
# Professional services: 8

bds_data_firm_size[bds_data_firm_size$sector %in% mfg_naics,"sector2"] <- 1
bds_data_firm_size[bds_data_firm_size$sector %in% constr_naics,"sector2"] <- 2
bds_data_firm_size[bds_data_firm_size$sector %in% retail_naics,"sector2"] <- 3
bds_data_firm_size[bds_data_firm_size$sector %in% transpo_naics,"sector2"] <- 4
bds_data_firm_size[bds_data_firm_size$sector %in% info_naics,"sector2"] <- 5
bds_data_firm_size[bds_data_firm_size$sector %in% re_naics,"sector2"] <- 6
bds_data_firm_size[bds_data_firm_size$sector %in% educ_naics,"sector2"] <- 7
bds_data_firm_size[bds_data_firm_size$sector %in% prof_naics,"sector2"] <- 8


## Generate new size variable that is just a substring of the full-length size variable
bds_data_firm_size <- mutate(bds_data_firm_size,fsize2=substr(bds_data_firm_size$fsize,1,1))

## Create size cutoff variables. 0 is small (defined above), 1 is large, 2 is total
bds_data_firm_size <- mutate(bds_data_firm_size,sizedummy=0)
bds_data_firm_size[bds_data_firm_size$fsize2 %in% letters[(sizecutoff):10],"sizedummy"] <- 1

## Collapse
bds_data_firm_size_collapsed <- bds_data_firm_size %>% group_by(year,sector2,sizedummy) %>% 
  summarize_at(vars(emp,firms,estabs),sum)

bds_data_firm_age$sector2 <- 0

bds_data_firm_age[bds_data_firm_age$sector %in% mfg_naics,"sector2"] <- 1
bds_data_firm_age[bds_data_firm_age$sector %in% constr_naics,"sector2"] <- 2
bds_data_firm_age[bds_data_firm_age$sector %in% retail_naics,"sector2"] <- 3
bds_data_firm_age[bds_data_firm_age$sector %in% transpo_naics,"sector2"] <- 4
bds_data_firm_age[bds_data_firm_age$sector %in% info_naics,"sector2"] <- 5
bds_data_firm_age[bds_data_firm_age$sector %in% re_naics,"sector2"] <- 6
bds_data_firm_age[bds_data_firm_age$sector %in% educ_naics,"sector2"] <- 7
bds_data_firm_age[bds_data_firm_age$sector %in% prof_naics,"sector2"] <- 8


## Generate new age variable that is just a substring of the full-length size variable
bds_data_firm_age <- mutate(bds_data_firm_age,fage2=substr(bds_data_firm_age$fage,1,1))

## Here you choose the youngest age category (in years) you want counted as an old firm (default: 9)
##  1) 0 years, 2) 1 year, 3) 2, 4) 3, 5) 4, 6) 5, 7) 6-10, 8) 11-15, 9) 16-20, 10) 21-25, 11) 26+,
##  12) Left censored (generally applies to firms at the start of the sample, who are treated as
##  having an unknown age)

## Create age cutoff variables.  0 is young (defined above), 1 is old, 2 is total
bds_data_firm_age <- mutate(bds_data_firm_age,agedummy=0)
bds_data_firm_age[bds_data_firm_age$fage2 %in% letters[(agecutoff):12],"agedummy"] <- 1

## Collapse
bds_data_firm_age_collapsed <- bds_data_firm_age %>% group_by(year,sector2,agedummy) %>% 
  summarize_at(vars(emp,firms,estabs),sum)

bds_age_total <- bds_data_firm_age %>% group_by(year) %>% 
  summarize_at(vars(emp,firms,estabs),sum) %>% mutate(agedummy=2,sector2=9)



##### Step 5: Calculate size and age shares within industries #####


### Sector by age
age_data <- bind_rows(bds_data_firm_age_collapsed,bds_sector_total_collapsed,bds_age_total_collapsed,
                      bds_all)

## Reorder variables and drop size indicator
age_data <- arrange(age_data,year,sector2,agedummy)
age_data <- dplyr::select(age_data,-sizedummy)

## Reshape data
age_data_wide <- age_data %>% pivot_wider(names_from=agedummy,values_from=c(emp,estabs,firms))
age_data_wide <- age_data_wide[-c(which(is.na(age_data_wide$sector2)),which(age_data_wide$sector2==0)),]

### Sector by size
size_data <- bind_rows(bds_data_firm_size_collapsed,bds_sector_total_collapsed,bds_size_total_collapsed,bds_all)

## Reorder variables and drop age indicator
size_data <- arrange(size_data,year,sector2,agedummy)
size_data <- dplyr::select(size_data,-agedummy)

## Reshape data
size_data_wide <- size_data %>% pivot_wider(names_from=sizedummy,values_from=c(emp,estabs,firms))
size_data_wide <- size_data_wide[-c(which(is.na(size_data_wide$sector2)),which(size_data_wide$sector2==0)),]



##### Step 6: Create share plots #####


### Share of old firms

## The data start in 1978, so 1993 is the first year that a firm could show up as being
##  at least 16 years old (the baseline "old firm" age cutoff used in the paper).
startyear <- 1993

years <- startyear:2020

## Create shares of old firms in each sector. Because of missing values for the oldest age buckets 
##  in the early years, the "old share" is calculated as (1 - "young share")
age_data_plot <- age_data_wide %>% mutate(old_firm_share=1-firms_0/firms_2) %>% filter(year>=startyear)

## Choose colors for plotting
plotcolors <- c("red","forestgreen","orange","purple","salmon","green2","deeppink2")


## Reminder:
#
# Manufacturing: 1
# Construction: 2
# Retail: 3
# Transpo: 4
# Info: 5
# Real estate: 6
# Education: 7
# Professional services: 8

## Define names of industries to be plotted
plotnames <- c("Manufacturing","Construction","Retail Trade","Transportation",
               "Information","Real estate","Educational services",
               "Professional services")

## First, plot old firm share for first sector
windows(w=16,h=6)
par(oma=c(2.5,0,0,1))
m <- matrix(1:2,nrow=1,byrow = TRUE)
layout(mat = m)
plot(x=years,y=filter(age_data_plot,sector2==1)$old_firm_share,type="b",pch=1,lwd=3,col="blue",
     axes=F,frame=T,ann=F,ylim=c(0.185,0.575))
axis(1,cex.axis=1.5)
axis(2,cex.axis=1.5,at=pretty(c(0.185,0.575)),lab=paste0(pretty(c(0.185,0.575))*100, "%"),las=TRUE)


## Loop to add old firm shares for other sectors to the same plot
temppch <- 2

for(i in 2:8){
  
  lines(x=years,y=filter(age_data_plot,sector2==i)$old_firm_share,type="b",pch=temppch,lwd=3,col=plotcolors[i-1])
  
  temppch <- temppch+1
  
}

## Finally, add total share
lines(x=years,y=filter(age_data_plot,sector2==9)$old_firm_share,type="l",col="black",lwd=5)

title(main="Share of firms aged 16+ years by sector",
      col.main="blue",cex.main=1.5,font.main=2,line=0.5)


### Share of large firms

# Firm size can start earlier, since sizes are reported in all years
startyear <- 1978

years <- startyear:2020

## Create shares
size_data_plot <- size_data_wide %>% mutate(big_firm_share=firms_1/firms_2) %>% filter(year>=startyear)


## Plot large firm share for first sector
plot(x=years,y=filter(size_data_plot,sector2==1)$big_firm_share,type="b",pch=1,lwd=3,col="blue",
     axes=F,frame=T,ann=F,ylim=c(0.1,.5))
axis(1,cex.axis=1.5)
axis(2,cex.axis=1.5,at=pretty(c(0.1,.5)),lab=paste0(pretty(c(0.1,0.5))*100, "%"),las=TRUE)


## Loop to add large firm shares for other sectors to the same plot
temppch <- 2

for(i in 2:8){
  
  lines(x=years,y=filter(size_data_plot,sector2==i)$big_firm_share,type="b",pch=temppch,lwd=3,col=plotcolors[i-1])
  
  temppch <- temppch+1
  
}

## Add aggregate large firm share
lines(x=years,y=filter(size_data_plot,sector2==9)$big_firm_share,type="l",col="black",lwd=5)

title(main="Share of firms with 10+ employees by sector",
      col.main="blue",cex.main=1.5,font.main=2,line=0.5)


## Add legend
par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend(x="bottom",inset=0,legend=c(plotnames,"All"),text.width=0.5,cex=1.25,
       lwd=2,pch=c(1:(temppch-1),NA),col=c("blue",plotcolors,"black"),ncol=3)



